{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Collect Tweets into MongoDB"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Install Python libraries\n",
    "\n",
    "You may need to restart your Jupyter Notebook instance after installed those libraries."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!pip install pymongo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install pymongo[srv]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install dnspython"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!pip install tweepy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!pip install twitter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Python libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pymongo\n",
    "from pymongo import MongoClient\n",
    "import json\n",
    "import tweepy\n",
    "import twitter\n",
    "from pprint import pprint\n",
    "import configparser\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Load the Authorization Info"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save database connection info and API Keys in a config.ini file and use the configparse to load the authorization info. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = configparser.ConfigParser()\n",
    "config.read('config.ini')\n",
    "\n",
    "CONSUMER_KEY      = config['mytwitter']['api_key']\n",
    "CONSUMER_SECRET   = config['mytwitter']['api_secrete']\n",
    "OAUTH_TOKEN       = config['mytwitter']['access_token']\n",
    "OATH_TOKEN_SECRET = config['mytwitter']['access_secrete']\n",
    "\n",
    "mongod_connect = config['mymongo']['connection']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Connect to the MongoDB Cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'id_1'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client = MongoClient(mongod_connect)\n",
    "db = client.demo # use or create a database named demo\n",
    "tweet_collection = db.tweet_collection #use or create a collection named tweet_collection\n",
    "tweet_collection.create_index([(\"id\", pymongo.ASCENDING)],unique = True) # make sure the collected tweets are unique"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Use the Streaming API to Collect Tweets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Authorize the Stream API "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "stream_auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)\n",
    "stream_auth.set_access_token(OAUTH_TOKEN, OATH_TOKEN_SECRET)\n",
    "\n",
    "strem_api = tweepy.API(stream_auth)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define the query for the Stream API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "track = ['election'] # define the keywords, tweets contain election\n",
    "\n",
    "locations = [-78.9326449,38.4150904,-78.8816972,38.4450731] #defin the location, in Harrisonburg, VA"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The collected tweets will contain 'election' <span style=\"color:red;font-weight:bold\"> OR </span> are located in Harrisonburg, VA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "class MyStreamListener(tweepy.StreamListener):\n",
    "    def on_status(self, status):\n",
    "        print (status.id_str)\n",
    "        try:\n",
    "            tweet_collection.insert_one(status._json)\n",
    "        except:\n",
    "            pass\n",
    "  \n",
    "    def on_error(self, status_code):\n",
    "        if status_code == 420:\n",
    "            #returning False in on_data disconnects the stream\n",
    "            return False\n",
    "myStreamListener = MyStreamListener()\n",
    "myStream = tweepy.Stream(auth = strem_api.auth, listener=myStreamListener)\n",
    "myStream.filter(track=track)#  (locations = locations)   #Use either track or locations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Use the REST API to Collect Tweets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Authorize the REST API "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "rest_auth = twitter.oauth.OAuth(OAUTH_TOKEN,OATH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET)\n",
    "rest_api = twitter.Twitter(auth=rest_auth)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define the query for the REST API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "count = 100 #number of returned tweets, default and max is 100\n",
    "geocode = \"38.4392897,-78.9412224,50mi\"  # defin the location, in Harrisonburg, VA\n",
    "q = \"election\"                               #define the keywords, tweets contain election"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The collected tweets will contain 'election' <span style=\"color:red;font-weight:bold\"> AND </span> are located in Harrisonburg, VA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "search_results = rest_api.search.tweets( count=count,q=q, geocode=geocode) #you can use both q and geocode\n",
    "statuses = search_results[\"statuses\"]\n",
    "since_id_new = statuses[-1]['id']\n",
    "for statuse in statuses:\n",
    "    try:\n",
    "        tweet_collection.insert_one(statuse)\n",
    "        pprint(statuse['created_at'])# print the date of the collected tweets\n",
    "    except:\n",
    "        pass"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Continue fetching early tweets with the same query. \n",
    "<p><span style=\"color:red;font-weight:bold\">YOU WILL REACH YOUR RATE LIMIT VERY FAST</span></p>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "since_id_old = 0\n",
    "while(since_id_new != since_id_old):\n",
    "    since_id_old = since_id_new\n",
    "    search_results = rest_api.search.tweets( count=count,q=q,\n",
    "                        geocode=geocode, max_id= since_id_new)\n",
    "    statuses = search_results[\"statuses\"]\n",
    "    since_id_new = statuses[-1]['id']\n",
    "    for statuse in statuses:\n",
    "        try:\n",
    "            tweet_collection.insert_one(statuse)\n",
    "            pprint(statuse['created_at']) # print the date of the collected tweets\n",
    "        except:\n",
    "            pass"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## View the Collected Tweets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print the number of tweets and unique twitter users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1086\n",
      "1038\n"
     ]
    }
   ],
   "source": [
    "print(tweet_collection.estimated_document_count())# number of tweets collected\n",
    "\n",
    "user_cursor = tweet_collection.distinct(\"user.id\")\n",
    "print (len(user_cursor)) # number of unique Twitter users "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a text index and print the Tweets containing specific keywords. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'text_index'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tweet_collection.create_index([(\"text\", pymongo.TEXT)], name='text_index', default_language='english') # create a text index\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a cursor to query tweets with the created index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "tweet_cursor = tweet_collection.find({\"$text\": {\"$search\": \"vote\"}}) # return tweets contain vote"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use pprint to display tweets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----\n",
      "name: mark\n",
      "text: RT @Mia4MD: Early Voting is happening NOW in Maryland for the 2020 Presidential Election. You can vote in an early voting center in the cou…\n",
      "----\n",
      "name: megaminimalist\n",
      "text: RT @Amy_Siskind: TEXAS‼️ - an astounding 7.8 million Texans have voted!  That’s 88% of all the 2016 votes. \n",
      "\n",
      "There is little mail-in voting…\n",
      "----\n",
      "name: Virginians4U\n",
      "text: KEEP Pres: TRUMP\n",
      "CHANGE Senate: GADE\n",
      "HOPE Congress GOOD or FREITAS or TAYLOR or BENJAMIN or ANDREWS \n",
      "VOTE VOTE VOTE… https://t.co/2ZjF6E0Blv\n",
      "----\n",
      "name: Antonio Licon\n",
      "text: #VOTE\n",
      "----\n",
      "name: jtevans\n",
      "text: #Vote\n",
      "----\n",
      "name: Toto\n",
      "text: RT @grantstern: Why is ⁦@MayorGimenez⁩ against social distancing during voting in #Miami?\n",
      "\n",
      "Because he knows that suppressing the vote will…\n",
      "----\n",
      "name: Kiran Riar\n",
      "text: RT @raquelmiranda89: Shelby County: you have today and tomorrow to early vote. That is it! After Thursday, your last chance to vote in pers…\n",
      "----\n",
      "name: Kirby Mendyk\n",
      "text: I'm not voting until a celebrity tweets a picture of their taint captioned with the cowardly virtue signal \"vOtE\".… https://t.co/qZF39npGeJ\n",
      "----\n",
      "name: 🇺🇸♥️DEPLORABLE🎃 REEGAN♥️🇺🇸100% AMERICAN GIRL!\n",
      "text: RT @kkjjPNW: You do not have to vote by mail in most states.  You have the right to vote IN PERSON. https://t.co/ZduOCM1Yhw.  In 7 states y…\n",
      "----\n",
      "name: Money Mitch\n",
      "text: RT @Carnage4Life: It’s easier for a white person to vote from space than for an African American or Latino to vote in their home zip code i…\n"
     ]
    }
   ],
   "source": [
    "\n",
    "for document in tweet_cursor[0:10]: # display the first 10 tweets from the query\n",
    "    try:\n",
    "        print ('----')\n",
    "#         pprint (document) # use pprint to print the entire tweet document\n",
    "   \n",
    "        print ('name:', document[\"user\"][\"name\"]) # user name\n",
    "        print ('text:', document[\"text\"])         # tweets\n",
    "    except:\n",
    "        print (\"***error in encoding\")\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "tweet_cursor = tweet_collection.find({\"$text\": {\"$search\": \"vote\"}}) # return tweets contain vote"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use pandas to display tweets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>created_at</th>\n",
       "      <th>id</th>\n",
       "      <th>id_str</th>\n",
       "      <th>text</th>\n",
       "      <th>source</th>\n",
       "      <th>truncated</th>\n",
       "      <th>in_reply_to_status_id</th>\n",
       "      <th>in_reply_to_status_id_str</th>\n",
       "      <th>in_reply_to_user_id</th>\n",
       "      <th>...</th>\n",
       "      <th>lang</th>\n",
       "      <th>timestamp_ms</th>\n",
       "      <th>quoted_status_id</th>\n",
       "      <th>quoted_status_id_str</th>\n",
       "      <th>quoted_status</th>\n",
       "      <th>quoted_status_permalink</th>\n",
       "      <th>extended_tweet</th>\n",
       "      <th>possibly_sensitive</th>\n",
       "      <th>metadata</th>\n",
       "      <th>display_text_range</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5f9970e6ea281243abe9e85f</td>\n",
       "      <td>Wed Oct 28 13:23:45 +0000 2020</td>\n",
       "      <td>1321442530572066817</td>\n",
       "      <td>1321442530572066817</td>\n",
       "      <td>RT @Mia4MD: Early Voting is happening NOW in M...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891425160</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5f9970eaea281243abe9e8c9</td>\n",
       "      <td>Wed Oct 28 13:23:49 +0000 2020</td>\n",
       "      <td>1321442549718945792</td>\n",
       "      <td>1321442549718945792</td>\n",
       "      <td>RT @Amy_Siskind: TEXAS‼️ - an astounding 7.8 m...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891429725</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5f9970efea281243abe9e939</td>\n",
       "      <td>Wed Oct 28 13:23:54 +0000 2020</td>\n",
       "      <td>1321442568459198467</td>\n",
       "      <td>1321442568459198467</td>\n",
       "      <td>KEEP Pres: TRUMP\\nCHANGE Senate: GADE\\nHOPE Co...</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891434193</td>\n",
       "      <td>1.321379e+18</td>\n",
       "      <td>1321378894327484416</td>\n",
       "      <td>{'created_at': 'Wed Oct 28 09:10:53 +0000 2020...</td>\n",
       "      <td>{'url': 'https://t.co/g07jyouj2s', 'expanded':...</td>\n",
       "      <td>{'full_text': 'KEEP Pres: TRUMP\n",
       "CHANGE Senate:...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5f997133ea281243abe9ea5a</td>\n",
       "      <td>Wed Oct 28 13:25:02 +0000 2020</td>\n",
       "      <td>1321442853244018690</td>\n",
       "      <td>1321442853244018690</td>\n",
       "      <td>#VOTE</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>und</td>\n",
       "      <td>1603891502091</td>\n",
       "      <td>1.321441e+18</td>\n",
       "      <td>1321440905098874882</td>\n",
       "      <td>{'created_at': 'Wed Oct 28 13:17:17 +0000 2020...</td>\n",
       "      <td>{'url': 'https://t.co/SiYtZzvyXq', 'expanded':...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5f9970e6ea281243abe9e868</td>\n",
       "      <td>Wed Oct 28 13:23:45 +0000 2020</td>\n",
       "      <td>1321442531532435457</td>\n",
       "      <td>1321442531532435457</td>\n",
       "      <td>#Vote</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>und</td>\n",
       "      <td>1603891425389</td>\n",
       "      <td>1.321180e+18</td>\n",
       "      <td>1321179867052986368</td>\n",
       "      <td>{'created_at': 'Tue Oct 27 20:00:01 +0000 2020...</td>\n",
       "      <td>{'url': 'https://t.co/VhdpN9qfYf', 'expanded':...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5f9970f5ea281243abe9e9b4</td>\n",
       "      <td>Wed Oct 28 13:24:00 +0000 2020</td>\n",
       "      <td>1321442594174455809</td>\n",
       "      <td>1321442594174455809</td>\n",
       "      <td>RT @grantstern: Why is ⁦@MayorGimenez⁩ against...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/android\" ...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891440324</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>5f9970eaea281243abe9e8c6</td>\n",
       "      <td>Wed Oct 28 13:23:49 +0000 2020</td>\n",
       "      <td>1321442549379334144</td>\n",
       "      <td>1321442549379334144</td>\n",
       "      <td>RT @raquelmiranda89: Shelby County: you have t...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891429644</td>\n",
       "      <td>1.316336e+18</td>\n",
       "      <td>1316335746702340097</td>\n",
       "      <td>{'created_at': 'Wed Oct 14 11:11:13 +0000 2020...</td>\n",
       "      <td>{'url': 'https://t.co/AbysBKfi2s', 'expanded':...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>5f9970f9ea281243abe9ea15</td>\n",
       "      <td>Wed Oct 28 13:24:04 +0000 2020</td>\n",
       "      <td>1321442610347737088</td>\n",
       "      <td>1321442610347737088</td>\n",
       "      <td>I'm not voting until a celebrity tweets a pict...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/android\" ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891444180</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>{'full_text': 'I'm not voting until a celebrit...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>5f997133ea281243abe9ea5b</td>\n",
       "      <td>Wed Oct 28 13:25:02 +0000 2020</td>\n",
       "      <td>1321442853617164289</td>\n",
       "      <td>1321442853617164289</td>\n",
       "      <td>RT @kkjjPNW: You do not have to vote by mail i...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891502180</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5f9970faea281243abe9ea39</td>\n",
       "      <td>Wed Oct 28 13:24:05 +0000 2020</td>\n",
       "      <td>1321442617138286594</td>\n",
       "      <td>1321442617138286594</td>\n",
       "      <td>RT @Carnage4Life: It’s easier for a white pers...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>en</td>\n",
       "      <td>1603891445799</td>\n",
       "      <td>1.320028e+18</td>\n",
       "      <td>1320027534789140480</td>\n",
       "      <td>{'created_at': 'Sat Oct 24 15:41:03 +0000 2020...</td>\n",
       "      <td>{'url': 'https://t.co/8u0q776vmb', 'expanded':...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        _id                      created_at  \\\n",
       "0  5f9970e6ea281243abe9e85f  Wed Oct 28 13:23:45 +0000 2020   \n",
       "1  5f9970eaea281243abe9e8c9  Wed Oct 28 13:23:49 +0000 2020   \n",
       "2  5f9970efea281243abe9e939  Wed Oct 28 13:23:54 +0000 2020   \n",
       "3  5f997133ea281243abe9ea5a  Wed Oct 28 13:25:02 +0000 2020   \n",
       "4  5f9970e6ea281243abe9e868  Wed Oct 28 13:23:45 +0000 2020   \n",
       "5  5f9970f5ea281243abe9e9b4  Wed Oct 28 13:24:00 +0000 2020   \n",
       "6  5f9970eaea281243abe9e8c6  Wed Oct 28 13:23:49 +0000 2020   \n",
       "7  5f9970f9ea281243abe9ea15  Wed Oct 28 13:24:04 +0000 2020   \n",
       "8  5f997133ea281243abe9ea5b  Wed Oct 28 13:25:02 +0000 2020   \n",
       "9  5f9970faea281243abe9ea39  Wed Oct 28 13:24:05 +0000 2020   \n",
       "\n",
       "                    id               id_str  \\\n",
       "0  1321442530572066817  1321442530572066817   \n",
       "1  1321442549718945792  1321442549718945792   \n",
       "2  1321442568459198467  1321442568459198467   \n",
       "3  1321442853244018690  1321442853244018690   \n",
       "4  1321442531532435457  1321442531532435457   \n",
       "5  1321442594174455809  1321442594174455809   \n",
       "6  1321442549379334144  1321442549379334144   \n",
       "7  1321442610347737088  1321442610347737088   \n",
       "8  1321442853617164289  1321442853617164289   \n",
       "9  1321442617138286594  1321442617138286594   \n",
       "\n",
       "                                                text  \\\n",
       "0  RT @Mia4MD: Early Voting is happening NOW in M...   \n",
       "1  RT @Amy_Siskind: TEXAS‼️ - an astounding 7.8 m...   \n",
       "2  KEEP Pres: TRUMP\\nCHANGE Senate: GADE\\nHOPE Co...   \n",
       "3                                              #VOTE   \n",
       "4                                              #Vote   \n",
       "5  RT @grantstern: Why is ⁦@MayorGimenez⁩ against...   \n",
       "6  RT @raquelmiranda89: Shelby County: you have t...   \n",
       "7  I'm not voting until a celebrity tweets a pict...   \n",
       "8  RT @kkjjPNW: You do not have to vote by mail i...   \n",
       "9  RT @Carnage4Life: It’s easier for a white pers...   \n",
       "\n",
       "                                              source  truncated  \\\n",
       "0  <a href=\"http://twitter.com/download/iphone\" r...      False   \n",
       "1  <a href=\"http://twitter.com/download/iphone\" r...      False   \n",
       "2  <a href=\"https://mobile.twitter.com\" rel=\"nofo...       True   \n",
       "3  <a href=\"https://mobile.twitter.com\" rel=\"nofo...      False   \n",
       "4  <a href=\"http://twitter.com/download/iphone\" r...      False   \n",
       "5  <a href=\"http://twitter.com/download/android\" ...      False   \n",
       "6  <a href=\"http://twitter.com/download/iphone\" r...      False   \n",
       "7  <a href=\"http://twitter.com/download/android\" ...       True   \n",
       "8  <a href=\"http://twitter.com/download/iphone\" r...      False   \n",
       "9  <a href=\"http://twitter.com/download/iphone\" r...      False   \n",
       "\n",
       "   in_reply_to_status_id in_reply_to_status_id_str  in_reply_to_user_id  ...  \\\n",
       "0                    NaN                      None                  NaN  ...   \n",
       "1                    NaN                      None                  NaN  ...   \n",
       "2                    NaN                      None                  NaN  ...   \n",
       "3                    NaN                      None                  NaN  ...   \n",
       "4                    NaN                      None                  NaN  ...   \n",
       "5                    NaN                      None                  NaN  ...   \n",
       "6                    NaN                      None                  NaN  ...   \n",
       "7                    NaN                      None                  NaN  ...   \n",
       "8                    NaN                      None                  NaN  ...   \n",
       "9                    NaN                      None                  NaN  ...   \n",
       "\n",
       "  lang   timestamp_ms quoted_status_id quoted_status_id_str  \\\n",
       "0   en  1603891425160              NaN                  NaN   \n",
       "1   en  1603891429725              NaN                  NaN   \n",
       "2   en  1603891434193     1.321379e+18  1321378894327484416   \n",
       "3  und  1603891502091     1.321441e+18  1321440905098874882   \n",
       "4  und  1603891425389     1.321180e+18  1321179867052986368   \n",
       "5   en  1603891440324              NaN                  NaN   \n",
       "6   en  1603891429644     1.316336e+18  1316335746702340097   \n",
       "7   en  1603891444180              NaN                  NaN   \n",
       "8   en  1603891502180              NaN                  NaN   \n",
       "9   en  1603891445799     1.320028e+18  1320027534789140480   \n",
       "\n",
       "                                       quoted_status  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  {'created_at': 'Wed Oct 28 09:10:53 +0000 2020...   \n",
       "3  {'created_at': 'Wed Oct 28 13:17:17 +0000 2020...   \n",
       "4  {'created_at': 'Tue Oct 27 20:00:01 +0000 2020...   \n",
       "5                                                NaN   \n",
       "6  {'created_at': 'Wed Oct 14 11:11:13 +0000 2020...   \n",
       "7                                                NaN   \n",
       "8                                                NaN   \n",
       "9  {'created_at': 'Sat Oct 24 15:41:03 +0000 2020...   \n",
       "\n",
       "                             quoted_status_permalink  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  {'url': 'https://t.co/g07jyouj2s', 'expanded':...   \n",
       "3  {'url': 'https://t.co/SiYtZzvyXq', 'expanded':...   \n",
       "4  {'url': 'https://t.co/VhdpN9qfYf', 'expanded':...   \n",
       "5                                                NaN   \n",
       "6  {'url': 'https://t.co/AbysBKfi2s', 'expanded':...   \n",
       "7                                                NaN   \n",
       "8                                                NaN   \n",
       "9  {'url': 'https://t.co/8u0q776vmb', 'expanded':...   \n",
       "\n",
       "                                      extended_tweet possibly_sensitive  \\\n",
       "0                                                NaN                NaN   \n",
       "1                                                NaN                NaN   \n",
       "2  {'full_text': 'KEEP Pres: TRUMP\n",
       "CHANGE Senate:...                NaN   \n",
       "3                                                NaN                NaN   \n",
       "4                                                NaN                NaN   \n",
       "5                                                NaN                NaN   \n",
       "6                                                NaN                NaN   \n",
       "7  {'full_text': 'I'm not voting until a celebrit...                NaN   \n",
       "8                                                NaN              False   \n",
       "9                                                NaN                NaN   \n",
       "\n",
       "   metadata  display_text_range  \n",
       "0       NaN                 NaN  \n",
       "1       NaN                 NaN  \n",
       "2       NaN                 NaN  \n",
       "3       NaN                 NaN  \n",
       "4       NaN                 NaN  \n",
       "5       NaN                 NaN  \n",
       "6       NaN                 NaN  \n",
       "7       NaN                 NaN  \n",
       "8       NaN                 NaN  \n",
       "9       NaN                 NaN  \n",
       "\n",
       "[10 rows x 37 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tweet_df = pd.DataFrame(list(tweet_cursor ))\n",
    "tweet_df[:10] #display the first 10 tweets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7fca39a57be0>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAOfElEQVR4nO3df6zd9V3H8edbOrKNu9EytpumRctcMzU0TrhBFLOcO9Tww9iagIGQWRZM/QNm5zAB9w/7Z5EZux8Ss+QqxC6p3CFDS5zTkcoN+gfNWiQrUGcJVijU1qUMdpFk4t7+cb/Em3Iu7f1+z+H2+87zkTTnnM/3+/18Pu/7TV/n288959vITCRJtfzYSk9AkjR6hrskFWS4S1JBhrskFWS4S1JBq1Z6AgDnn39+btiwodWxr776Kuecc85oJ3SGqFqbdfVP1dr6Xtf+/fu/l5nvH7btjAj3DRs2sG/fvlbHzs3NMRgMRjuhM0TV2qyrf6rW1ve6IuI/ltrmsowkFWS4S1JBhrskFWS4S1JBhrskFWS4S1JBhrskFWS4S1JBhrskFXRGfEO1iwMvvMxNd3xjRcY+fNc1KzKuJJ2KV+6SVJDhLkkFGe6SVJDhLkkFGe6SVJDhLkkFGe6SVJDhLkkFGe6SVJDhLkkFGe6SVJDhLkkFGe6SVJDhLkkFnTLcI+LeiDgeEU8uajsvIh6OiEPN45qmPSLiTyLimYj4TkRcPM7JS5KGO50r978Arjyp7Q5gT2ZuBPY0rwGuAjY2f7YBXxnNNCVJy3HKcM/MR4ETJzVvBnY2z3cCWxa1fzUXPAasjoi1o5qsJOn0tF1zn8zMowDN4wea9nXA84v2O9K0SZLeRqP+b/ZiSFsO3TFiGwtLN0xOTjI3N9dqwMl3wW2bXm91bFdt53y65ufnxz7GSrCu/qlaW9W6oH24H4uItZl5tFl2Od60HwEuWLTfeuDFYR1k5gwwAzA1NZWDwaDVRO7etZsdB1bmv4I9fONgrP3Pzc3R9udyJrOu/qlaW9W6oP2yzEPA1ub5VmD3ovbfaj41cxnw8hvLN5Kkt88pL3kj4j5gAJwfEUeAO4G7gPsj4mbgOeC6Zve/A64GngH+G/jEGOYsSTqFU4Z7Zt6wxKYrhuybwC1dJyVJ6sZvqEpSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQZ3CPSJ+LyKeiognI+K+iHhnRFwYEXsj4lBEfC0izh7VZCVJp6d1uEfEOuB3ganMvAg4C7ge+DzwxczcCLwE3DyKiUqSTl/XZZlVwLsiYhXwbuAo8DHggWb7TmBLxzEkScsUmdn+4IjtwOeA14BvAduBxzLzQ832C4BvNlf2Jx+7DdgGMDk5ecns7GyrORw/8TLHXms3/642rTt3rP3Pz88zMTEx1jFWgnX1T9Xa+l7X9PT0/sycGrZtVdtOI2INsBm4EPg+8FfAVUN2HfrukZkzwAzA1NRUDgaDVvO4e9dudhxoXUYnh28cjLX/ubk52v5czmTW1T9Va6taF3Rblvll4N8z878y83+AB4FfBFY3yzQA64EXO85RkrRMXcL9OeCyiHh3RARwBfA08AhwbbPPVmB3tylKkpardbhn5l4WfnH6OHCg6WsGuB34dEQ8A7wPuGcE85QkLUOnxerMvBO486TmZ4FLu/QrSerGb6hKUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGGuyQVZLhLUkGdwj0iVkfEAxHxrxFxMCJ+ISLOi4iHI+JQ87hmVJOVJJ2erlfuXwb+PjN/CvhZ4CBwB7AnMzcCe5rXkqS3Uetwj4j3Ah8F7gHIzB9m5veBzcDOZredwJauk5QkLU9kZrsDIz4CzABPs3DVvh/YDryQmasX7fdSZr5paSYitgHbACYnJy+ZnZ1tNY/jJ17m2GutDu1s07pzx9r//Pw8ExMTYx1jJVhX/1Stre91TU9P78/MqWHbuoT7FPAYcHlm7o2ILwOvAJ88nXBfbGpqKvft29dqHnfv2s2OA6taHdvV4buuGWv/c3NzDAaDsY6xEqyrf6rW1ve6ImLJcO+y5n4EOJKZe5vXDwAXA8ciYm0z8FrgeIcxJEkttA73zPxP4PmI+HDTdAULSzQPAVubtq3A7k4zlCQtW9f1jE8CuyLibOBZ4BMsvGHcHxE3A88B13UcQ5K0TJ3CPTOfAIat91zRpV9JUjd+Q1WSCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJakgw12SCjLcJamgzuEeEWdFxL9ExN82ry+MiL0RcSgivhYRZ3efpiRpOUZx5b4dOLjo9eeBL2bmRuAl4OYRjCFJWoZO4R4R64FrgD9vXgfwMeCBZpedwJYuY0iSli8ys/3BEQ8Afwi8B/h94Cbgscz8ULP9AuCbmXnRkGO3AdsAJicnL5mdnW01h+MnXubYa60O7WzTunPH2v/8/DwTExNjHWMlWFf/VK2t73VNT0/vz8ypYdtWte00In4NOJ6Z+yNi8EbzkF2Hvntk5gwwAzA1NZWDwWDYbqd0967d7DjQuoxODt84GGv/c3NztP25nMmsq3+q1la1LugQ7sDlwK9HxNXAO4H3Al8CVkfEqsx8HVgPvNh9mpKk5Wi95p6Zf5CZ6zNzA3A98I+ZeSPwCHBts9tWYHfnWUqSlmUcn3O/Hfh0RDwDvA+4ZwxjSJLewkgWqzNzDphrnj8LXDqKfiVJ7fgNVUkqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqyHCXpIIMd0kqqHW4R8QFEfFIRByMiKciYnvTfl5EPBwRh5rHNaObriTpdHS5cn8duC0zfxq4DLglIn4GuAPYk5kbgT3Na0nS26h1uGfm0cx8vHn+A+AgsA7YDOxsdtsJbOk6SUnS8kRmdu8kYgPwKHAR8Fxmrl607aXMfNPSTERsA7YBTE5OXjI7O9tq7OMnXubYa60O7WzTunPH2v/8/DwTExNjHWMlWFf/VK2t73VNT0/vz8ypYdtWde08IiaArwOfysxXIuK0jsvMGWAGYGpqKgeDQavx7961mx0HOpfRyuEbB2Ptf25ujrY/lzOZdfVP1dqq1gUdPy0TEe9gIdh3ZeaDTfOxiFjbbF8LHO82RUnScnX5tEwA9wAHM/MLizY9BGxtnm8FdrefniSpjS7rGZcDHwcORMQTTdtngLuA+yPiZuA54LpuU5QkLVfrcM/MfwaWWmC/om2/kqTu/IaqJBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBVkuEtSQYa7JBU0lnCPiCsj4rsR8UxE3DGOMSRJS1s16g4j4izgT4FfAY4A346IhzLz6VGPJWn0NtzxjTe13bbpdW4a0j5qh++6ZuxjDDOs5rfLuGoex5X7pcAzmflsZv4QmAU2j2EcSdISIjNH22HEtcCVmfnbzeuPAz+fmbeetN82YFvz8sPAd1sOeT7wvZbHnumq1mZd/VO1tr7X9ROZ+f5hG0a+LAPEkLY3vYNk5gww03mwiH2ZOdW1nzNR1dqsq3+q1la1LhjPsswR4IJFr9cDL45hHEnSEsYR7t8GNkbEhRFxNnA98NAYxpEkLWHkyzKZ+XpE3Ar8A3AWcG9mPjXqcRbpvLRzBqtam3X1T9XaqtY1+l+oSpJWnt9QlaSCDHdJKqjX4V71NgcRcTgiDkTEExGxb6Xn00VE3BsRxyPiyUVt50XEwxFxqHlcs5JzbGOJuj4bES805+2JiLh6JefYRkRcEBGPRMTBiHgqIrY37b0+Z29RV+/P2VJ6u+be3Obg31h0mwPghgq3OYiIw8BUZvb5yxUARMRHgXngq5l5UdP2R8CJzLyreVNek5m3r+Q8l2uJuj4LzGfmH6/k3LqIiLXA2sx8PCLeA+wHtgA30eNz9hZ1/SY9P2dL6fOVu7c56IHMfBQ4cVLzZmBn83wnC3/JemWJunovM49m5uPN8x8AB4F19PycvUVdZfU53NcBzy96fYQ6JyuBb0XE/uY2DdVMZuZRWPhLB3xgheczSrdGxHeaZZteLV2cLCI2AD8H7KXQOTupLih0zhbrc7if1m0OeuryzLwYuAq4pVkC0JnvK8BPAh8BjgI7VnY67UXEBPB14FOZ+cpKz2dUhtRV5pydrM/hXvY2B5n5YvN4HPhrFpagKjnWrIG+sRZ6fIXnMxKZeSwz/zczfwT8GT09bxHxDhYCcFdmPtg09/6cDauryjkbps/hXvI2BxFxTvMLHyLiHOBXgSff+qjeeQjY2jzfCuxewbmMzBvh1/gNenjeIiKAe4CDmfmFRZt6fc6WqqvCOVtKbz8tA9B8bOlL/P9tDj63wlPqLCI+yMLVOizcHuIv+1xXRNwHDFi4teox4E7gb4D7gR8HngOuy8xe/XJyiboGLPzzPoHDwO+8sU7dFxHxS8A/AQeAHzXNn2Fhfbq35+wt6rqBnp+zpfQ63CVJw/V5WUaStATDXZIKMtwlqSDDXZIKMtwlqSDDXZIKMtwlqaD/A0oUVervrMxPAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "tweet_df[\"favorite_count\"].hist() # create a histogram show the favorite count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
